home *** CD-ROM | disk | FTP | other *** search
/ MacHack 1998 / MacHack 1998.toast / Sessions / Opitmizing PowerPC Code / CycleCounter / CycleCounter.c next >
Encoding:
C/C++ Source or Header  |  1998-06-20  |  17.4 KB  |  753 lines  |  [TEXT/CWIE]

  1. /*
  2.     File:        604Profile.c
  3.     
  4.     Contains:    Routines to time at the insruction dispatch level small test routines.
  5.  
  6.     Written by:    Mike Neil
  7.     
  8.     Copyright:    This code is PUBLIC DOMAIN as of Friday June 19, 1998
  9.  
  10.     Note:    The code you want to modify is at TestCode()
  11.     
  12.             main() is set up for G3 procesors.  The constant "11" in the line:
  13.                 testState.time.pmc1 = testState.time.pmc1 - 11;
  14.             has to be modified slightly for other processors, so that the first
  15.             instruction is cycle 1 at instruction 1.
  16.             
  17.             The cycle counter is not available on 601 or 603 series processors, so this
  18.             code doesn't work on those machines.
  19.  
  20. */
  21.  
  22. #include     <stdio.h>
  23. #include     <stdlib.h>
  24. #include     <time.h>
  25. #include     <string.h>
  26. #include     <Types.h>
  27. #include     "Disassembler.h"
  28.  
  29.  
  30. //    Defines for the Disassembler
  31. enum {
  32.      kStandardDisAsmOptions = 
  33. //        Disassemble_Power         |
  34.         Disassemble_PowerPC32     |
  35. //        Disassemble_PowerPC64    |
  36. //        Disassemble_PowerPC601    |
  37.         Disassemble_RsvBitsErr    |
  38.         Disassemble_FieldErr    |
  39.         Disassemble_Extended    |
  40. //        Disassemble_DecSI        |
  41. //        Disassemble_DecUI        |
  42.         Disassemble_DecField    |
  43. //        Disassemble_DecOffset    |
  44. //        Disassemble_DecPCRel    |
  45. //        Disassemble_Hex2sComp    |
  46. //        Disassemble_MinHex        |
  47. //        Disassemble_CRBits        |
  48. //        Disassemble_CRFltBits    |
  49.         Disassemble_BranchBO    |
  50.         Disassemble_TrapTO
  51. };
  52.  
  53.  
  54. //    Structs for register state
  55. struct GeneralPurposeRegisters {
  56.     UInt32                    R0;
  57.     UInt32                    R1;
  58.     UInt32                    SP;
  59.     UInt32                    R3;
  60.     UInt32                    R4;
  61.     UInt32                    R5;
  62.     UInt32                    R6;
  63.     UInt32                    R7;
  64.     UInt32                    R8;
  65.     UInt32                    R9;
  66.     UInt32                    reserved1;
  67.     UInt32                    reserved2;
  68.     UInt32                    reserved3;
  69.     UInt32                    R13;
  70.     UInt32                    R14;
  71.     UInt32                    R15;
  72.     UInt32                    R16;
  73.     UInt32                    R17;
  74.     UInt32                    R18;
  75.     UInt32                    R19;
  76.     UInt32                    R20;
  77.     UInt32                    R21;
  78.     UInt32                    R22;
  79.     UInt32                    R23;
  80.     UInt32                    R24;
  81.     UInt32                    R25;
  82.     UInt32                    R26;
  83.     UInt32                    R27;
  84.     UInt32                    R28;
  85.     UInt32                    R29;
  86.     UInt32                    R30;
  87.     UInt32                    R31;
  88. };
  89. typedef struct GeneralPurposeRegisters GeneralPurposeRegisters;
  90.  
  91.  
  92. struct FloatingPointRegisters {
  93.     double                    FPR0;
  94.     double                    FPR1;
  95.     double                    FPR2;
  96.     double                    FPR3;
  97.     double                    FPR4;
  98.     double                    FPR5;
  99.     double                    FPR6;
  100.     double                    FPR7;
  101.     double                    FPR8;
  102.     double                    FPR9;
  103.     double                    FPR10;
  104.     double                    FPR11;
  105.     double                    FPR12;
  106.     double                    FPR13;
  107.     double                    FPR14;
  108.     double                    FPR15;
  109.     double                    FPR16;
  110.     double                    FPR17;
  111.     double                    FPR18;
  112.     double                    FPR19;
  113.     double                    FPR20;
  114.     double                    FPR21;
  115.     double                    FPR22;
  116.     double                    FPR23;
  117.     double                    FPR24;
  118.     double                    FPR25;
  119.     double                    FPR26;
  120.     double                    FPR27;
  121.     double                    FPR28;
  122.     double                    FPR29;
  123.     double                    FPR30;
  124.     double                    FPR31;
  125. };
  126. typedef struct FloatingPointRegisters FloatingPointRegisters;
  127.  
  128.  
  129. struct MachineState {
  130.     UInt32                    LR;
  131.     UInt32                    CTR;
  132.     UInt32                    CR;
  133.     UInt32                    XER;
  134. };
  135. typedef struct MachineState MachineState;
  136.  
  137.  
  138. /*
  139.     The MMCR0 bit field definitions
  140.  
  141.                       E
  142.                       N
  143.             D     D D I   R
  144.             I D D M M N   T
  145.             S P U S R T   C    Thresh       PMC1Sel PMC2Sel
  146.             0 0 0 0 0 0 0 00 0 111111 1 1 1 1222222 222233
  147.             0 1 2 3 4 5 6 78 9 012345 6 7 8 9012345 678901
  148.  
  149.     mmcr0    0 1 0 0 0 0 1 00 0 000001 0 0 0 0000001 000100
  150. */
  151.  
  152. struct TimingInfo604 {
  153.     UInt32        mmcr0_On;
  154.     UInt32        mmcr0_Off;
  155.     SInt32        pmc1;
  156.     SInt32        pmc2;
  157. };
  158. typedef struct TimingInfo604 TimingInfo604;
  159.  
  160.  
  161. //    General test data structure, shared by asm code.
  162. struct TimingState
  163. {
  164.     TimingInfo604                time;            //    16 bytes    0    offset
  165.     MachineState                machine;        //    16 bytes    16
  166.     GeneralPurposeRegisters        gpr;            //    128 bytes    32
  167.     FloatingPointRegisters        fpr;            //    256 bytes    160
  168.     MachineState                save_machine;    //    16 bytes    416
  169.     GeneralPurposeRegisters        save_gpr;        //    128 bytes    432
  170.     FloatingPointRegisters        save_fpr;        //    256 bytes    560
  171.     MachineState                end_machine;    //    16 bytes    816
  172.     GeneralPurposeRegisters        end_gpr;        //    128 bytes    832
  173. };
  174. typedef struct TimingState TimingState;
  175.  
  176.  
  177. //    Size of TestHarness prolog and epilog 
  178. enum
  179. {
  180.     kDUMP_END_STATE        = 0,
  181.     kHarnessPrologSize    = (480L),
  182.     kHarnessEpilogSize    = ((34L * 4L) + 160L),
  183. };
  184.  
  185.  
  186. //    Local prototypes
  187. void            TestHarness(void);
  188. void            TestCode(void);
  189.  
  190. void            ExecuteTest(TimingState *r3, UInt32 *r4);
  191. void            SetUpTestState(TimingState *testState);
  192.  
  193. UInt32            MaskInterrupts(void);
  194. void             RestoreInterrupts(UInt32 oldSR);
  195.  
  196. static UInt32    sDataAddress;
  197.  
  198.  
  199. /*------------------------------------------------------------------
  200.     SetUpTestState    [internal]
  201.     
  202.     This function initializes the state of the registers and memory
  203.     for the test run.  It is called once before each test run.
  204. ------------------------------------------------------------------*/
  205.  
  206. void
  207. SetUpTestState(TimingState *testState)
  208. {
  209.     UInt32    *data;
  210.     
  211.     testState->machine.CR = 0x02000000;
  212.     testState->gpr.R3 = 0;
  213.     testState->gpr.R4 = 0;
  214.     testState->gpr.R5 = 0;
  215.     testState->gpr.R6 = 0;
  216.     testState->gpr.R7 = 0;
  217.     testState->gpr.R8 = 0;
  218.     testState->gpr.R9 = 0;
  219.     testState->gpr.R29 = 0;
  220.     testState->gpr.R31 = sDataAddress;
  221.     
  222.     data = (UInt32 *)(sDataAddress);
  223.     data[5] = sDataAddress + 4;                //    value for r3 in: lwz r3,0x0014(r31)
  224.     
  225.     testState->fpr.FPR15 = 10.0;
  226.     testState->fpr.FPR14 = 1.0;
  227. }
  228.  
  229.  
  230. /*------------------------------------------------------------------
  231.     TestCode    [internal]
  232.     
  233.     This is the code that will be tested for each run.
  234.     kTestSize defines the length of the test in bytes (inst# * 4).
  235. ------------------------------------------------------------------*/
  236.  
  237. enum
  238. {
  239.     kTestSize            = (16L * 4L)
  240. };
  241.  
  242. asm
  243. void
  244. TestCode(void)
  245. {
  246. #if 0        // various instructions
  247.     add        r3,r3,r4
  248.     mtcrf    0x01,r3
  249.     mtcrf    0xFF,r3
  250.     mcrxr    cr0
  251.     mtlr    r3
  252.     mtctr    r3
  253.     cror    0,1,2
  254.     add        r3,r3,r3
  255.     cror    0,4,8
  256.     add        r3,r3,r3
  257. #endif
  258.  
  259. #if 0        // this shows how slow setting the overflow instruction can be.
  260.     lis        r4,0x1000
  261.     addo    r4,r4,r4
  262.     addo    r4,r4,r4
  263.     addo    r4,r4,r4
  264.     lis        r4,0x1000
  265.     addo    r4,r4,r4
  266.     addo    r4,r4,r4
  267.     addo    r4,r4,r4
  268. #endif
  269.  
  270. #if 0        // this is a somewhat slow set of store instructions.
  271.     lis        r4,0x10
  272.     stw        r4,0x40(r0)
  273.     lis        r4,0x10
  274.     lis        r5,0x10
  275.     stw        r6,0x44(r0)
  276.     lis        r4,0x10
  277.     lis        r5,0x10
  278.     stw        r6,0x48(r0)
  279.     lis        r4,0x10
  280.     lis        r5,0x10
  281.     stw        r6,0x48(r0)
  282. #endif
  283. #if 0        // this is an opitmized version of those store instructions.
  284.     lis        r4,0x10
  285.     stw        r4,0x40(r0)
  286.     lis        r4,0x10
  287.     stw        r6,0x44(r0)
  288.     lis        r5,0x10
  289.     stw        r6,0x48(r0)
  290.     lis        r4,0x10
  291.     stw        r6,0x48(r0)
  292.     lis        r5,0x10
  293.     lis        r4,0x10
  294.     lis        r5,0x10
  295. #endif
  296.  
  297. #if 1        // This shows how breathtakingly slow a load can be if you
  298.             // recently stored to that address.  This turns out to be
  299.             // slow even if the load and store differ by a multiple of
  300.             // 4K.  (at least on a G3)
  301.     lis        r4,0x10
  302.     stw        r4,0x40(r0)
  303.     lis        r4,0x10
  304.     lwz        r6,0x40(r0)
  305.     lis        r5,0x10
  306.     stw        r6,0x48(r0)
  307.     lis        r4,0x10
  308.     lwz        r6,0x1048(r0)
  309.     lis        r5,0x10
  310.     lis        r4,0x10
  311.     lis        r5,0x10
  312. #endif
  313.  
  314.         // These are here because if you try to happen the "blr" instruction
  315.         // at the end of this routine, the machine will crash as you branch to
  316.         // no place in particular
  317.     li        r3,0
  318.     li        r3,0
  319.     li        r3,0
  320.     li        r3,0
  321.     li        r3,0
  322.     li        r3,0
  323.     li        r3,0
  324.     li        r3,0
  325.     li        r3,0
  326.     li        r3,0
  327. }
  328.  
  329.  
  330. /*------------------------------------------------------------------
  331.     main
  332.     
  333.     This will setup the test environment and run the tests,
  334.     each test is run three times to remove ICache and DCache
  335.     Issues.  The code is page aligned and the test code is
  336.     cache line aligned.
  337. ------------------------------------------------------------------*/
  338.  
  339. void
  340. main(void)
  341. {
  342.     TimingState                testState;
  343.     UInt32                    SR;
  344.     float                    ipc;
  345.     Ptr                        codePagePtr;
  346.     UInt32                    *codePage;
  347.     Ptr                        dataPagePtr;
  348.     UInt32                    *dataPage;
  349.     UInt32                    srcCodeAddress;
  350.     UInt32                    codeAddress;
  351.     double                    idpc;
  352.     UInt32                    index;
  353.     char                    mnemonic[256];
  354.     char                    operand[256];
  355.     char                    comment[256];
  356.     DisassemblerStatus         status;
  357.     UInt32                    lastCycle;
  358.     
  359.     printf ("Welcome, you've got code.\n\n");
  360.     
  361.     codePagePtr = NewPtrClear(4096L * 2);                                    //    Allocate space for the code
  362.     codePage = (UInt32 *)(((UInt32)(codePagePtr) + 0xFFF) & 0xFFFFF000);    //    align it to a page.
  363.     
  364.     dataPagePtr = NewPtrClear(4096L * 4);                                    //    Allocate space for the data
  365.     dataPage = (UInt32 *)((((UInt32)(codePagePtr) + 0xFFF) & 0xFFFFF000) + 4096L);
  366.     
  367.     sDataAddress = (UInt32)(dataPage);
  368.     
  369.     lastCycle = 0;
  370.     
  371.     printf("----Test Start----\n");
  372.     
  373.     for (index = 4; index <= kTestSize; index = index + 4)
  374.     {
  375.         srcCodeAddress = *(UInt32 *)(TestHarness);                                //    Copy the prolog of the test into the buffer
  376.         codeAddress = (UInt32)codePage;
  377.         
  378.         BlockMove((void *)(srcCodeAddress), (void *)(codeAddress), kHarnessPrologSize);
  379.         MakeDataExecutable((void *)(codeAddress), kHarnessPrologSize);
  380.         
  381.         srcCodeAddress = *(UInt32 *)(TestCode);                                    //    Copy in the test code
  382.         codeAddress = codeAddress + kHarnessPrologSize;
  383.         
  384.         BlockMove((void *)(srcCodeAddress), (void *)(codeAddress), index);
  385.         MakeDataExecutable((void *)(codeAddress), index);
  386.  
  387.         status = ppcDisassembler(    (UInt32 *)(codeAddress + index - 4),     // Pointer to current instruction
  388.                                     0,                                         // PC adjustment
  389.                                     kStandardDisAsmOptions,                    // Disassembly option flags
  390.                                     mnemonic,                                 // Mnemonic string (to be filled in)
  391.                                     operand,                                 // Operand string (to be filled in)
  392.                                     comment,                                 // Comment string (to be filled in)
  393.                                     0,                                         // User ref num (base of assembly)
  394.                                     nil);                                    // Call-back function for symbol name look-ups
  395.  
  396.         srcCodeAddress = *(UInt32 *)(TestHarness);                                //    Copy in the prolog
  397.         srcCodeAddress = srcCodeAddress + kHarnessPrologSize;
  398.         codeAddress = codeAddress + index;
  399.  
  400.         BlockMove((void *)(srcCodeAddress), (void *)(codeAddress), kHarnessEpilogSize);
  401.         MakeDataExecutable((void *)(codeAddress), kHarnessEpilogSize);
  402.         
  403.         memset(&testState, 0, sizeof(testState));                                //    Clear the test state
  404.         
  405.         
  406.         testState.time.mmcr0_On = 0x42010044;                                    //    Setup the on value
  407.         testState.time.mmcr0_Off = 0xC2010044;                                    //    and the off value
  408.         testState.time.pmc1 = 0;                                                //    reset the counters
  409.         testState.time.pmc2 = 0;
  410.         
  411.         SR = MaskInterrupts();                                                    //    Turn off interrupts
  412.         
  413.         SetUpTestState(&testState);                                                //    Run the test three times
  414.         ExecuteTest(&testState, codePage);
  415.         SetUpTestState(&testState);
  416.         ExecuteTest(&testState, codePage);
  417.         SetUpTestState(&testState);
  418.         ExecuteTest(&testState, codePage);
  419.         
  420.         RestoreInterrupts(SR);                                                    //    Restore interrupts
  421.         
  422.         testState.time.pmc1 = testState.time.pmc1 - 11;                            //    Subtract off the overhead
  423.         testState.time.pmc2 = testState.time.pmc2 -  4;    
  424.         
  425.         if (testState.time.pmc1 != lastCycle)
  426.         {
  427.             printf("\n");
  428.             if ((testState.time.pmc1 - lastCycle) != 1)
  429.                 printf("Stall!\n\n");
  430.             lastCycle = testState.time.pmc1;
  431.         }
  432.         printf("Cycles: %3ld, Count: %3ld %s %s %s\n", testState.time.pmc1, testState.time.pmc2, mnemonic, operand, comment);
  433.     }
  434.     
  435.     if (kDUMP_END_STATE)
  436.     {
  437.         UInt32    count;
  438.         UInt32    *gpr;
  439.         
  440.         printf("\nLR    0x%08X\n", testState.end_machine.LR);
  441.         printf("CTR    0x%08X\n", testState.end_machine.CTR);
  442.         printf("CR    0x%08X\n", testState.end_machine.CR);
  443.         printf("XER    0x%08X\n\n", testState.end_machine.XER);
  444.         
  445.         gpr = &(testState.end_gpr.R0);
  446.         for (count = 0; count < 32; count++)
  447.             printf("r%ld    0x%08X\n", count, gpr[count]);
  448.     }
  449.     
  450.     printf("\n----Test End----\n");
  451.     
  452.     idpc = (double)(testState.time.pmc2) / (double)(testState.time.pmc1);    //    Compute Inst. Dispatched per Cycle
  453.     printf("\nInstructions Dispatched per Cycle:    %f\n", idpc);
  454.     
  455.     DisposePtr(codePagePtr);
  456.     DisposePtr(dataPagePtr);
  457. }
  458.  
  459.  
  460. /*------------------------------------------------------------------
  461.     ExecuteTest    [internal]
  462.     
  463.     Glue code to jump to the TestHarness.
  464. ------------------------------------------------------------------*/
  465.  
  466. asm
  467. void
  468. ExecuteTest(TimingState *r3, UInt32 *r4)
  469. {
  470.     mtctr    r4
  471.     bctr
  472. }
  473.  
  474.  
  475. /*------------------------------------------------------------------
  476.     TestHarness    [internal]
  477.     
  478.     This code contains the prolog and epilog that are added to the
  479.     test code for each run.  It saves the current register state,
  480.     loads the user specified registers, and run the test.  It then
  481.     saves the end register state and restores the registers to
  482.     their initial values.
  483. ------------------------------------------------------------------*/
  484.  
  485. asm
  486. void
  487. TestHarness(void)
  488. {
  489.     mr        r10, r3                //    0    //    Put the pointer to our state in r10
  490.                                         //    Save the machine state
  491.     mflr    r3                    //    4    //    Save the LR
  492.     stw        r3, 416(r10)        //    8    
  493.     mfctr    r3                    //    12    //    Save the CTR
  494.     stw        r3, 4 + 416(r10)    //    16
  495.     mfcr    r3                    //    20    //    Save the CR
  496.     stw        r3, 8 + 416(r10)    //    24    
  497.     mfspr    r3, 1                //    28    //    Save the XER
  498.     stw        r3, 12 + 416(r10)    //    32    
  499.     
  500.     lwz        r11, 0(r10)            //    36    //    Get the on command
  501.     lwz        r12, 4(r10)            //    40    //    Get the off command
  502.     
  503.     stw        r10, 32 + 40(r10)    //    44    //    Save r10-r12 in the test state
  504.     stw        r11, 32 + 44(r10)    //    48    //    
  505.     stw        r12, 32 + 48(r10)    //    52
  506.     
  507.     stmw    r0, 432(r10)        //    56    //    Save the GPRs
  508.     
  509.     addi    r3, r10, 552        //    76    //    Save the FPU state
  510.     stfdu    fp0, 8(r3)            //    80
  511.     stfdu    fp1, 8(r3)
  512.     stfdu    fp2, 8(r3)
  513.     stfdu    fp3, 8(r3)
  514.     stfdu    fp4, 8(r3)
  515.     stfdu    fp5, 8(r3)
  516.     stfdu    fp6, 8(r3)
  517.     stfdu    fp7, 8(r3)
  518.     stfdu    fp8, 8(r3)
  519.     stfdu    fp9, 8(r3)
  520.     stfdu    fp10, 8(r3)
  521.     stfdu    fp11, 8(r3)
  522.     stfdu    fp12, 8(r3)
  523.     stfdu    fp13, 8(r3)
  524.     stfdu    fp14, 8(r3)
  525.     stfdu    fp15, 8(r3)
  526.     stfdu    fp16, 8(r3)
  527.     stfdu    fp17, 8(r3)
  528.     stfdu    fp18, 8(r3)
  529.     stfdu    fp19, 8(r3)
  530.     stfdu    fp20, 8(r3)
  531.     stfdu    fp21, 8(r3)
  532.     stfdu    fp22, 8(r3)
  533.     stfdu    fp23, 8(r3)
  534.     stfdu    fp24, 8(r3)
  535.     stfdu    fp25, 8(r3)
  536.     stfdu    fp26, 8(r3)
  537.     stfdu    fp27, 8(r3)
  538.     stfdu    fp28, 8(r3)
  539.     stfdu    fp29, 8(r3)
  540.     stfdu    fp30, 8(r3)
  541.     stfdu    fp31, 8(r3)
  542.     nop                            //    208
  543.     nop                            //    212
  544.     nop                            //    216
  545.  
  546.     nop                            //    220
  547.     nop                            //    224
  548.     nop                            //    228
  549.     nop                            //    234
  550.  
  551.     mtspr    952, r12            //    60    //    Turn off the Performance Monitor
  552.     li        r3, 0                //    64    //    Clear PMC1 and PMC2
  553.     mtspr    953, r3                //    68
  554.     mtspr    954, r3                //    72
  555.     
  556.     addi    r3, r10, 152        //    76    //    Load the FPU state
  557.     lfdu    fp0, 8(r3)            //    80
  558.     lfdu    fp1, 8(r3)
  559.     lfdu    fp2, 8(r3)
  560.     lfdu    fp3, 8(r3)
  561.     lfdu    fp4, 8(r3)
  562.     lfdu    fp5, 8(r3)
  563.     lfdu    fp6, 8(r3)
  564.     lfdu    fp7, 8(r3)
  565.     lfdu    fp8, 8(r3)
  566.     lfdu    fp9, 8(r3)
  567.     lfdu    fp10, 8(r3)
  568.     lfdu    fp11, 8(r3)
  569.     lfdu    fp12, 8(r3)
  570.     lfdu    fp13, 8(r3)
  571.     lfdu    fp14, 8(r3)
  572.     lfdu    fp15, 8(r3)
  573.     lfdu    fp16, 8(r3)
  574.     lfdu    fp17, 8(r3)
  575.     lfdu    fp18, 8(r3)
  576.     lfdu    fp19, 8(r3)
  577.     lfdu    fp20, 8(r3)
  578.     lfdu    fp21, 8(r3)
  579.     lfdu    fp22, 8(r3)
  580.     lfdu    fp23, 8(r3)
  581.     lfdu    fp24, 8(r3)
  582.     lfdu    fp25, 8(r3)
  583.     lfdu    fp26, 8(r3)
  584.     lfdu    fp27, 8(r3)
  585.     lfdu    fp28, 8(r3)
  586.     lfdu    fp29, 8(r3)
  587.     lfdu    fp30, 8(r3)
  588.     lfdu    fp31, 8(r3)
  589.     nop                            //    208
  590.     nop                            //    212
  591.     nop                            //    216
  592.  
  593.     nop                            //    220
  594.     nop                            //    224
  595.     nop                            //    228
  596.     nop                            //    234
  597.         
  598.     lwz        r3, 0 + 16(r10)        //    76    //    Load the LR
  599.     mtlr    r3                    //    80
  600.     lwz        r3, 4 + 16(r10)        //    84    //    Load the CTR
  601.     mtctr    r3                    //    88
  602.     lwz        r3, 8 + 16(r10)        //    92    //    Load the CR
  603.     mtcrf    0xFF, r3            //    96
  604.     lwz        r3, 12 + 16(r10)    //    100    //    Load the XER
  605.     mtspr    1, r3                //    104    //
  606.     
  607.     mr        r2, r10                //    108
  608.     lmw        r3, 12 + 32 (r2)    //    112    //    Load the GPRs
  609.     lwz        r0, 0 + 32 (r2)        //    116
  610.     nop                            //    120    //    Don't load the Stack!
  611.     lwz        r2, 8 + 32 (r2)        //    124
  612.     
  613.     nop                            //    140    //    Some NOPs to make the first instruction on a cache line
  614.     nop                            //    144
  615.     cror 0,0,0                    //    148    //    dispatch syncronize
  616.     sync                        //    128    //    Syncronize the processor
  617.     isync                        //    132    //    
  618.     mtspr    952, r11            //    136    //    Start the counters
  619.     b @beginInst                //    152    //    branch to the first instruction
  620.     nop                            //    156    //    Place holder
  621. @beginInst                        //    160    
  622. /*    Put test here.    */    
  623.     
  624. /*    End test here.    */
  625.     isync                        //    Syncronize
  626.         
  627.     mtspr    952, r12            //    Stop the counters
  628.     stmw    r0, 832(r10)        //    Save the end GPRs
  629.     
  630.     mfspr    r4, 953                //    Get PMC1 and PMC2
  631.     mfspr    r5, 954
  632.     stw        r4, 8(r10)            //    Save them in the timing state
  633.     stw        r5, 12(r10)
  634.     
  635.     mflr    r3                    //    Save the LR
  636.     stw        r3, 816(r10)        
  637.     mfctr    r3                    //    Save the CTR
  638.     stw        r3, 4 + 816(r10)    
  639.     mfcr    r3                    //    Save the CR
  640.     stw        r3, 8 + 816(r10)    
  641.     mfspr    r3, 1                //    Save the XER
  642.     stw        r3, 12 + 816(r10)    
  643.  
  644.     lwz        r3, 0 + 416(r10)    //    Load the LR
  645.     mtlr    r3                    
  646.     lwz        r3, 4 + 416(r10)    //    Load the CTR
  647.     mtctr    r3                    
  648.     lwz        r3, 8 + 416(r10)    //    Load the CR
  649.     mtcrf    0xFF, r3            
  650.     lwz        r3, 12 + 416(r10)    //    Load the XER
  651.     mtspr    1, r3                
  652.  
  653.     addi    r3, r10, 552        //    Load the FPU state
  654.     lfdu    fp0, 8(r3)            
  655.     lfdu    fp1, 8(r3)
  656.     lfdu    fp2, 8(r3)
  657.     lfdu    fp3, 8(r3)
  658.     lfdu    fp4, 8(r3)
  659.     lfdu    fp5, 8(r3)
  660.     lfdu    fp6, 8(r3)
  661.     lfdu    fp7, 8(r3)
  662.     lfdu    fp8, 8(r3)
  663.     lfdu    fp9, 8(r3)
  664.     lfdu    fp10, 8(r3)
  665.     lfdu    fp11, 8(r3)
  666.     lfdu    fp12, 8(r3)
  667.     lfdu    fp13, 8(r3)
  668.     lfdu    fp14, 8(r3)
  669.     lfdu    fp15, 8(r3)
  670.     lfdu    fp16, 8(r3)
  671.     lfdu    fp17, 8(r3)
  672.     lfdu    fp18, 8(r3)
  673.     lfdu    fp19, 8(r3)
  674.     lfdu    fp20, 8(r3)
  675.     lfdu    fp21, 8(r3)
  676.     lfdu    fp22, 8(r3)
  677.     lfdu    fp23, 8(r3)
  678.     lfdu    fp24, 8(r3)
  679.     lfdu    fp25, 8(r3)
  680.     lfdu    fp26, 8(r3)
  681.     lfdu    fp27, 8(r3)
  682.     lfdu    fp28, 8(r3)
  683.     lfdu    fp29, 8(r3)
  684.     lfdu    fp30, 8(r3)
  685.     lfdu    fp31, 8(r3)
  686.     nop                            
  687.     nop                            
  688.     nop                            
  689.  
  690.     nop                            
  691.     nop                            
  692.     nop                            
  693.     nop                            //    160 bytes long
  694.  
  695.     mr        r2, r10                
  696.     lmw        r3, 12 + 432(r2)    //    Load the GPRs
  697.     lwz        r0, 0 + 432 (r2)    
  698.     nop                            //    Don't load the stack
  699.     lwz        r2, 8 + 432 (r2)    
  700.  
  701.     blr                            //    return
  702. }
  703.  
  704.  
  705.  
  706. /*------------------------------------------------------------------
  707.     InterruptsCode    [internal]
  708.     
  709.     This code is used to turn on and off 68K interrupts.
  710. ------------------------------------------------------------------*/
  711.  
  712. static UInt16 MaskInterruptsCode[] =
  713.   {
  714.     0x40C0,                // MOVE SR, D0
  715.     0x007C, 0x0700,        // ORI.W #$0700, SR
  716.     0x4E75                // RTS
  717.   };
  718.  
  719.  
  720. /* ProcInfo record for MaskInterrupts call */
  721. enum {
  722.     uppMaskInterruptsProcInfo = kRegisterBased |
  723.         RESULT_SIZE(kFourByteCode) |
  724.         REGISTER_RESULT_LOCATION(kRegisterD0)
  725. };
  726.  
  727. static UInt16 RestoreInterruptsCode[] =
  728.   {
  729.     0x46C0,                // MOVE D0, SR
  730.     0x4E75                // RTS
  731.   };
  732.  
  733. /* ProcInfo record for RestoreInterrupts call */
  734. enum {
  735.     uppRestoreInterruptsProcInfo = kRegisterBased |
  736.     REGISTER_ROUTINE_PARAMETER(1,kRegisterD0,kFourByteCode)
  737. };
  738.  
  739.  
  740.  
  741. UInt32
  742. MaskInterrupts(void) {
  743.     return(CallUniversalProc((UniversalProcPtr)(&MaskInterruptsCode), uppMaskInterruptsProcInfo));
  744. }
  745.  
  746.  
  747. void
  748. RestoreInterrupts(UInt32 oldSR) {
  749.     CallUniversalProc((UniversalProcPtr)(&RestoreInterruptsCode), uppRestoreInterruptsProcInfo, oldSR);
  750. }
  751.  
  752.  
  753.